*uses raw data, cleans and creates variables needed for analysis 



use "$raw/all_s4_noPII.dta", clear


******CLEANING*******
replace SCHOOL="DYNAMIC" if SCHOOL=="DYNAMIC SECONDARY SCHOOL"
replace Newfilm="MISS PEREGRINE" if Newfilm=="MISS PEREGRIE" | Newfilm=="MISS PERE" | Newfilm=="Miss Peregrine" | Newfilm=="mISS PEREGRIE" | Newfilm=="MISS PEREGRENE" | Newfilm=="Miss peregrine"
replace Newfilm="QUEEN OF KATWE" if Newfilm=="QWEEN OF KATWE"

*************LOOK AT AVERAGES***********
*tab SCHOOL Newfilm if MissingUCE!="yes"
tab SCHOOL Newfilm, row
tab SCHOOL, gen(school)

gen today=date("07/10/16", "DMY",2050)
replace today=date("08/10/16", "DMY",2050) if SCHOOL=="Makerere secondary school" | SCHOOL=="Paul mukasa secondary school" 
replace today=date("09/10/16", "DMY",2050) if SCHOOL=="ROYAL COLLEGE" | SCHOOL=="KULUMBA HS"
replace today=date("10/10/16", "DMY",2050) if SCHOOL=="hope boarding secondary school lutembe" | SCHOOL=="Mukono Parents high school" | SCHOOL=="KYANDONDO"
replace today=date("11/10/16", "DMY",2050) if SCHOOL=="DYNAMIC" | SCHOOL=="JAKAYZ S.S.S" | SCHOOL=="ATLAS" | SCHOOL=="GAYAZA MIXED ISLAMIC S.S.S"
format today %td
tab SCHOOL today

gen session=1 
replace session=2 if SCHOOL=="Makerere secondary school"
replace session=3 if SCHOOL=="Paul mukasa secondary school" 
replace session=4 if SCHOOL=="ROYAL COLLEGE" | SCHOOL=="KULUMBA HS"
replace session=5 if SCHOOL=="hope boarding secondary school lutembe" | SCHOOL=="Mukono Parents high school" 
replace session=6 if SCHOOL=="KYANDONDO"
replace session=7 if SCHOOL=="DYNAMIC" 
replace session=8 if SCHOOL=="JAKAYZ S.S.S" | SCHOOL=="ATLAS" | SCHOOL=="GAYAZA MIXED ISLAMIC S.S.S"
tab SCHOOL session

gen Q_large=0
replace Q_large=1 if session==1 | session==3 | session==6 | session==7 

tab today, gen(day)
gen day=1 if day1==1 
replace day=2 if day2==1
replace day=3 if day3==1
replace day=4 if day4==1
replace day=5 if day5==1



*controls
gen female=0 if GENDER!=""
replace female=1 if GENDER=="FEMALE"| GENDER=="female"
la var female "Female"
tab female
drop if AGE==39
gen S4=1 if regexm(S4S6,"4")
replace S4=0 if regexm(S4S6,"6")
*keep if MissingUCE!="yes"
drop if SCHOOL==""
*drop if AGE<15
*check dynamic 13 and 14 year olds 
sum AGE
la var AGE "Age"

tab SCHOOL 	Newfilm, row

*treatment
gen treatment=0 if Newfilm!=""
replace treatment=1 if Newfilm=="QUEEN OF KATWE"
la var treatment "Treatment"
drop if treatment==.
tab  SCHOOL treatment, row

*exams recoded
local exams ENG	MTC	HIST	GEOG	PHY	CHEM	BIO	COM	POL	LUG	CRE	IRE	ART	COMP	HSCI	LIT TED ENT  KIS IPS CST AGRIC  
foreach x of local exams {
replace `x'=18 if `x'==1
replace `x'=17 if `x'==2
replace `x'=16 if `x'==3
replace `x'=15 if `x'==4
replace `x'=14 if `x'==5
replace `x'=13 if `x'==6
replace `x'=12 if `x'==7
replace `x'=11 if `x'==8
replace `x'=10 if `x'==9
replace `x'=`x'-10
gen `x'_dummy=0
replace `x'_dummy=1 if `x'!=.
la var `x'_dummy "takes `x' "
tab `x'_dummy
}


tab MTC, gen(MTC_)
tab ENG, gen(ENG_)


egen subject_count=rownonmiss(`exams')
drop if subject_count==0

gen pass_count=0
foreach x of local exams {
replace pass_count=pass_count+1 if `x'>0 & `x'!=.
}
gen pass_min=0
replace pass_min=1 if pass_count>=3  & MTC!=0 & ENG!=0 

gen pass_max=0
replace pass_max=1 if pass_count>=6 & MTC!=0 & ENG!=0 & ((PHY!=0 &CHEM!=0) 	| ( BIO!=0 & PHY!=0) | (BIO!=0 & CHEM!=0))

foreach x of local exams {
gen `x'_credit=0
replace `x'_credit=1 if `x'>=3 & `x'!=.
}


*medians
egen median_subject=median(subject_count)
tab median_subject
gen less_median_subject=0
replace less_median_subject=1 if subject_count<median_subject
egen median_age=median(AGE)
tab median_age
gen above_median_age=0
replace above_median_age=1 if AGE>median_age


*school level variables
gen christian=0
replace christian=1 if school6==1 | school4==1 | school7==1 |school5==1 | school2==1

gen boarding_only=0
replace boarding_only=1 if school8==1 | school7==1 

gen top_500=0
replace top_500=1 if school7==1 | school8==1 | school6==1

gen top_1000=0
replace top_1000=1 if school7==1 | school8==1 | school6==1 | school4==1 | school5==1

gen high_fees=0
replace high_fees=1 if school8==1 | school6==1 

global school_controls christian boarding_only top_500 top_1000 high_fees

bysort SCHOOL: gen school_size=_N


*results 
egen total_score=rowtotal(`exams')
egen max_option=rowmax(HIST GEOG CRE	IRE)
egen core_score=rowtotal(ENG MTC BIO CHEM PHY max_option)

rowranks(`exams'), gen(exam_ranks1-exam_ranks22) highrank


foreach num of numlist 1/22 {
replace exam_ranks`num'=(subject_count+1-exam_ranks`num')
}

local i=1 
foreach x of local exams {
gen exam_`i'=`x'
local i=`i'+1 
}

gen rank_1=0
gen rank_2=0
gen rank_3=0
gen rank_4=0
gen rank_5=0
gen rank_6=0

gen count_rank1=0
gen count_rank2=0
gen count_rank3=0
gen count_rank4=0
gen count_rank5=0
gen count_rank6=0
foreach num of numlist 1/22 {
replace count_rank1=count_rank1+1 if exam_ranks`num'==1
replace count_rank2=count_rank2+1 if exam_ranks`num'==2
replace count_rank3=count_rank3+1 if exam_ranks`num'==3
replace count_rank4=count_rank4+1 if exam_ranks`num'==4
replace count_rank5=count_rank5+1 if exam_ranks`num'==5
replace count_rank6=count_rank6+1 if exam_ranks`num'==6
}

foreach num of numlist 1/22 {
replace rank_1=exam_`num' if exam_ranks`num'==1
replace rank_2=exam_`num' if exam_ranks`num'==2
replace rank_3=exam_`num' if exam_ranks`num'==3
replace rank_4=exam_`num' if exam_ranks`num'==4
replace rank_5=exam_`num' if exam_ranks`num'==5
replace rank_6=exam_`num' if exam_ranks`num'==6
}



gen best_score=.
replace best_score=6*rank_1 if count_rank1>=6

replace best_score=5*rank_1 +rank_6 if count_rank1==5
replace best_score=4*rank_1 + 2*rank_5 if count_rank1==4 & count_rank5>=2
replace best_score=4*rank_1 + rank_5 + rank_6 if count_rank1==4 & count_rank5<2

replace best_score=3*rank_1 + 3*rank_4 if count_rank1==3 & count_rank4>=3
replace best_score=3*rank_1 + 2*rank_4 + rank_5 if count_rank1==3 & count_rank4>=2
replace best_score=3*rank_1 + rank_4 + 2*rank_5 if count_rank1==3 & count_rank4==1 & count_rank5>=2
replace best_score=3*rank_1 + rank_4 + rank_5 + rank_6 if count_rank1==3 & count_rank4==1 & count_rank5==1 


*113333
replace best_score=2*rank_1 + 4*rank_3 if count_rank1==2 & count_rank3>=4
*113336
replace best_score=2*rank_1 + 3*rank_3 + rank_6 if count_rank1==2 & count_rank3==3
*113355
replace best_score=2*rank_1 + 2*rank_3 + 2*rank_5 if count_rank1==2 & count_rank3==2 & count_rank5>=2
*113356
replace best_score=2*rank_1 + 2*rank_3 + rank_5+rank_6 if count_rank1==2 & count_rank3==2 & count_rank5<2
*113446
replace best_score=2*rank_1 + rank_3 + 2*rank_4+rank_6 if count_rank1==2 & count_rank3==1 & count_rank4>=2
*113456
replace best_score=2*rank_1 + rank_3 + rank_4+rank_5+rank_6 if count_rank1==2 & count_rank3==1 & count_rank4==1 & count_rank5==1
*113455
replace best_score=2*rank_1 + rank_3 + rank_4+2*rank_5 if count_rank1==2 & count_rank3==1 & count_rank4==1 & count_rank5>=2
*113444
replace best_score=2*rank_1 + rank_3 + 3*rank_4 if count_rank1==2 & count_rank3==1 & count_rank4>=3

*123446
replace best_score=rank_1 + rank_2 + rank_3 + 2*rank_4 +rank_6  if count_rank1==1 & count_rank2==1 & count_rank3==1 & count_rank4==2 
*122255
replace best_score=rank_1 + 3*rank_2  + 2*rank_5  if count_rank1==1 & count_rank2==3 & count_rank5>1 
*122455
replace best_score=rank_1 + 2*rank_2 + rank_4 + 2*rank_5  if count_rank1==1 & count_rank2==2 & count_rank4==1 & count_rank5>1
*122222
replace best_score=rank_1 + 5*rank_2 if count_rank1==1 & count_rank2>=5
*122226
replace best_score=rank_1 + 4*rank_2 + rank_6 if count_rank1==1 & count_rank2==4
*122256
replace best_score=rank_1 + 3*rank_2 + rank_5 + rank_6 if count_rank1==1 & count_rank2==3 & count_rank5==1
*122456
replace best_score=rank_1 + 2*rank_2 + rank_4 + rank_5 + rank_6 if count_rank1==1 & count_rank2==2 & count_rank4==1 & count_rank5==1
*122446
replace best_score=rank_1 + 2*rank_2 + 2*rank_4 + rank_6 if count_rank1==1 & count_rank2==2 & count_rank4==2 
*122444
replace best_score=rank_1 + 2*rank_2 + 3*rank_4 if count_rank1==1 & count_rank2==2 & count_rank4>=2 
*123456 
replace best_score=rank_1 + rank_2 + rank_3 + rank_4 + rank_5 + rank_6 if count_rank1==1 & count_rank2==1 & count_rank3==1  & count_rank4==1 & count_rank5==1
*123444
replace best_score=rank_1 + rank_2 + rank_3 + 3*rank_4  if count_rank1==1 & count_rank2==1 & count_rank3==1 & count_rank4>=3
*123455
replace best_score=rank_1 + rank_2 + rank_3 + rank_4 + 2*rank_5 if count_rank1==1 & count_rank2==1 & count_rank3==1 & count_rank4==1 & count_rank5>=2
*123356
replace best_score=rank_1 + rank_2 + 2*rank_3 + rank_5 + rank_6 if count_rank1==1 & count_rank2==1 & count_rank3==2  & count_rank5==1
*123355
replace best_score=rank_1 + rank_2 + 2*rank_3 + 2*rank_5 if count_rank1==1 & count_rank2==1 & count_rank3==2 & count_rank5>=2
*123336
replace best_score=rank_1 + rank_2 + 3*rank_3 + rank_6 if count_rank1==1 & count_rank2==1 & count_rank3==3
*123333
replace best_score=rank_1 + rank_2 + 4*rank_3 if count_rank1==1 & count_rank2==1 & count_rank3>=3 




replace subject_count=8 if subject_count==7
replace subject_count=10 if subject_count==11


egen credit_count=rowtotal(*_credit)
gen division=5
replace division=4 if credit_count>=1 | pass_count>=2 & best_score>=3
replace division=3 if ((credit_count>=1 & pass_count>=6) | (credit_count>=2 & pass_count>=5)) & best_score>=12
replace division=2 if credit_count>=4 & pass_count>=6 & best_score>=22
replace division=1 if credit_count>=5 & pass_count>=6 & best_score>=30



foreach x of local exams {
egen mean_`x' = mean(`x') if treatment==0
egen sd_`x' = sd(`x') if treatment==0
egen mean_`x'2=max(mean_`x') // since no mean for treated group this copies for everyone control mean
egen sd_`x'2=max(sd_`x') // since no sd for treated group this copies for everyone control sd
gen std_`x' = (`x' - mean_`x'2) / sd_`x'2
}

egen std_total_score=rowtotal(std_*)
replace std_total_score=std_total_score/subject_count

egen std_max_option=rowmax(std_HIST std_GEOG std_CRE	std_IRE)
egen std_core_score=rowtotal(std_ENG std_MTC std_BIO std_CHEM std_PHY std_max_option)

egen std_total_score_exm=rowtotal(std_ENG std_HIST std_GEOG std_PHY	std_CHEM std_BIO std_COM std_POL std_LUG std_CRE std_IRE std_ART std_COMP std_HSCI std_LIT std_TED std_ENT std_KIS std_IPS std_CST std_AGRIC )
replace std_total_score_exm=std_total_score_exm/(subject_count-1)

//renormalise the aggregates
foreach x of varlist std_total_score std_max_option std_core_score std_total_score_exm {
egen mean_`x' = mean(`x') if treatment==0
egen sd_`x' = sd(`x') if treatment==0
egen mean_`x'2=max(mean_`x')
egen sd_`x'2=max(sd_`x')
gen `x'2 = (`x' - mean_`x'2) / sd_`x'2
}


replace RESULTS=RESULT if RESULTS==.
gen fail=1 if total_score<=6
replace fail=0 if fail==.


merge 1:1 id using  "$cleaned/all_s4_mock_cleaned.dta"

drop if _merge==2

drop _merge

bysort   treatment: sum std_MTC std_MTC_mock std_AGG_mock

_pctile AGG_mock if AGG_mock!=.
gen below_median_mock=0 if AGG_mock!=.
replace below_median_mock=1 if AGG_mock<r(r1)
la var below_median_mock "Below median mock"

_pctile MTC_mock if MTC_mock!=.
gen below_median_mock_mtc=0 if MTC_mock!=.
replace below_median_mock_mtc=1 if MTC_mock<r(r1)
la var below_median_mock "Below median mock maths"

drop AB AD AE AF AG AH AI AJ AK AL AM AN AO AP AQ AR AS AT AU AV

save "$cleaned/all_s4_cleaned.dta", replace




